To receive full credit, you’ll need to deliver on all of the items in the checklist below. Please read carefully through this checklist before you make your project proposal. You are (within these checklist constraints) strongly urged to limit scope and make the necessary simplifying assumptions so that you can deliver your work on time!
Goal: My objective in this project is seeking to understand the relationship between population growth and the crime rates in the United States between the year 1994 to 2018 and to comment and conclude on the overal pattern.
To analyze the crime rates in the US for the period from 1994 to 2013, we will use the data set provided by the FBI. See link: https://ucr.fbi.gov/crime-in-the-u.s/2013/crime-in-the-u.s.-2013/tables/1tabledatadecoverviewpdf/table_1_crime_in_the_united_states_by_volume_and_rate_per_100000_inhabitants_1994-2013.xls
I downloaded data for the period 1994 - 2018 and saved it as a .csv file in my local drive and later uploaded it into my github. DATA: https://raw.githubusercontent.com/igukusamuel/DATA-607-Final-Project/master/CrimeData.cs
library(ggplot2)
library(XML)
library(RCurl)
library(knitr)
library(dplyr)
library(plyr)
library(tidyr)
library(plotly)
library(tidyverse)
library(tidyselect)
library(data.table)
library(readxl)
library(fBasics)#Reading the dataset from my Github repository
crime_data <- read_csv("https://raw.githubusercontent.com/igukusamuel/DATA-607-Final-Project/master/CrimeData.csv")
#To analyze the data frame in detail uncomment the following lines of code.
#Observing the dataset
#glimpse(crime_data)
#str(crime_data)
#summary(crime_data)
#Confirm data was loaded correctly
head(crime_data, 2)## # A tibble: 2 x 24
## Year Population Violent_crime Violent_crime_r~ Murder_and_nonn~
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1994 260327021 1857670 714. 23326
## 2 1995 262803276 1798792 684. 21606
## # ... with 19 more variables:
## # Murder_and_nonnegligent_manslaughter_rate <dbl>, Rape <dbl>,
## # Rape_rate <dbl>, Robbery <dbl>, Robbery_rate <dbl>,
## # Aggravated_assault <dbl>, Aggravated_assault_rate <dbl>,
## # Property_crime <dbl>, Property_crime_rate <dbl>, Burglary <dbl>,
## # Burglary_rate <dbl>, Larceny_theft <dbl>, Larceny_theft_rate <dbl>,
## # Motor_vehicle_theft <dbl>, Motor_vehicle_theft_rate <dbl>, X21 <lgl>,
## # `_` <lgl>, X23 <lgl>, X24 <lgl>
#Select columns relevant to our analysis [1:20 only].
#Note Column 21:24 are empty
crime_data <- crime_data[, 1:20]
# Add a column on % Crime Rate (Total crimes / Total Population)
crime_data <- crime_data %>% mutate(PercentageCrimeRate = (
Violent_crime + Murder_and_nonnegligent_manslaughter +
Rape + Robbery + Aggravated_assault + Larceny_theft +
Property_crime + Burglary +
Motor_vehicle_theft) / Population * 100
)
# Add a column on Average Crime Rate (Total crimes / N (no of crime classifications))
crime_data %>% mutate(AverageCrimeRate = (
Violent_crime_rate + Murder_and_nonnegligent_manslaughter_rate +
Rape_rate + Robbery_rate + Aggravated_assault_rate + Larceny_theft_rate +
Property_crime_rate + Burglary_rate +
Motor_vehicle_theft_rate) / 9
) -> crime_data
# Print out all column names to confirm the two just added
names(crime_data)## [1] "Year"
## [2] "Population"
## [3] "Violent_crime"
## [4] "Violent_crime_rate"
## [5] "Murder_and_nonnegligent_manslaughter"
## [6] "Murder_and_nonnegligent_manslaughter_rate"
## [7] "Rape"
## [8] "Rape_rate"
## [9] "Robbery"
## [10] "Robbery_rate"
## [11] "Aggravated_assault"
## [12] "Aggravated_assault_rate"
## [13] "Property_crime"
## [14] "Property_crime_rate"
## [15] "Burglary"
## [16] "Burglary_rate"
## [17] "Larceny_theft"
## [18] "Larceny_theft_rate"
## [19] "Motor_vehicle_theft"
## [20] "Motor_vehicle_theft_rate"
## [21] "PercentageCrimeRate"
## [22] "AverageCrimeRate"
populationGrowth <-
ggplot(crime_data, aes(Year, Population, group = 1)) +
geom_line(linetype = "dashed", color = "red") +
geom_point()+
ggtitle("Population Size Between 1994 - 2018") +
xlab("Years") + ylab("Population") +
theme(
plot.title = element_text(color="blue", size=15, face="bold.italic"),
axis.text.x = element_text(angle=60, hjust=1),
axis.title.x = element_text(color="blue", size=15, face="bold"),
axis.title.y = element_text(color="blue", size=15, face="bold")
)
ggplotly(populationGrowth)ViolentCrimeRate <-
ggplot(crime_data, aes(Year, Violent_crime_rate, group = 1)) +
geom_line(linetype = "dashed", color = "red") +
geom_point()+
ggtitle("Violent crime rate per 100,000 between 1994 - 2018") +
xlab("Years") + ylab("Violent_crime") +
theme(
plot.title = element_text(color="blue", size=15, face="bold.italic"),
axis.text.x = element_text(angle=60, hjust=1),
axis.title.x = element_text(color="blue", size=15, face="bold"),
axis.title.y = element_text(color="blue", size=15, face="bold")
)
ggplotly(ViolentCrimeRate)Murder_and_nonnegligent_manslaughter_rate <-
ggplot(crime_data, aes(Year, Murder_and_nonnegligent_manslaughter_rate, group = 1)) +
geom_line(linetype = "dashed", color = "red") +
geom_point()+
ggtitle("Rape Rate per 100,000 between 1994 - 2018") +
xlab("Years") + ylab("Rate_rate") +
theme(
plot.title = element_text(color="blue", size=15, face="bold.italic"),
axis.text.x = element_text(angle=60, hjust=1),
axis.title.x = element_text(color="blue", size=15, face="bold"),
axis.title.y = element_text(color="blue", size=15, face="bold")
)
ggplotly(Murder_and_nonnegligent_manslaughter_rate)Rape_rate <-
ggplot(crime_data, aes(Year, Rape_rate, group = 1)) +
geom_line(linetype = "dashed", color = "red") +
geom_point()+
ggtitle("Rape Rate per 100,000 between 1994 - 2018") +
xlab("Years") + ylab("Rate_rate") +
theme(
plot.title = element_text(color="blue", size=15, face="bold.italic"),
axis.text.x = element_text(angle=60, hjust=1),
axis.title.x = element_text(color="blue", size=15, face="bold"),
axis.title.y = element_text(color="blue", size=15, face="bold")
)
ggplotly(Rape_rate)Robbery_rate <-
ggplot(crime_data, aes(Year, Robbery_rate, group = 1)) +
geom_line(linetype = "dashed", color = "red") +
geom_point()+
ggtitle("Robberly Rate per 100,000 between 1994 - 2018") +
xlab("Years") + ylab("Robberly_Rate") +
theme(
plot.title = element_text(color="blue", size=15, face="bold.italic"),
axis.text.x = element_text(angle=60, hjust=1),
axis.title.x = element_text(color="blue", size=15, face="bold"),
axis.title.y = element_text(color="blue", size=15, face="bold")
)
ggplotly(Robbery_rate)Aggravated_assault_rate <-
ggplot(crime_data, aes(Year, Aggravated_assault_rate, group = 1)) +
geom_line(linetype = "dashed", color = "red") +
geom_point()+
ggtitle("Aggrevated_Assualt per 100,000 growth between 1994 - 2018") +
xlab("Years") + ylab("Aggrevated_Assualt") +
theme(
plot.title = element_text(color="blue", size=15, face="bold.italic"),
axis.text.x = element_text(angle=60, hjust=1),
axis.title.x = element_text(color="blue", size=15, face="bold"),
axis.title.y = element_text(color="blue", size=15, face="bold")
)
ggplotly(Aggravated_assault_rate)The Aggrevated_Assualt per 100,000 has been decreasing over the years between 1994 - 2014 with a small increment between 2014 to 2016. There after the rate decrease between 2016-2018.
Property_crime_rate <-
ggplot(crime_data, aes(Year, Property_crime_rate, group = 1)) +
geom_line(linetype = "dashed", color = "red") +
geom_point()+
ggtitle("Property Crime per 100,000 between 1994 - 2018") +
xlab("Years") + ylab("Property_crime") +
theme(
plot.title = element_text(color="blue", size=15, face="bold.italic"),
axis.text.x = element_text(angle=60, hjust=1),
axis.title.x = element_text(color="blue", size=15, face="bold"),
axis.title.y = element_text(color="blue", size=15, face="bold")
)
ggplotly(Property_crime_rate)Burglary_rate <-
ggplot(crime_data, aes(Year, Burglary_rate, group = 1)) +
geom_line(linetype = "dashed", color = "red") +
geom_point()+
ggtitle("Burglary Rate per 100,000 between 1994 - 2018") +
xlab("Years") + ylab("Burglary_Rate") +
theme(
plot.title = element_text(color="blue", size=15, face="bold.italic"),
axis.text.x = element_text(angle=60, hjust=1),
axis.title.x = element_text(color="blue", size=15, face="bold"),
axis.title.y = element_text(color="blue", size=15, face="bold")
)
ggplotly(Burglary_rate)Larceny_theft_rate <-
ggplot(crime_data, aes(Year, Larceny_theft_rate, group = 1)) +
geom_line(linetype = "dashed", color = "red") +
geom_point()+
ggtitle("Larcency theft per 100,000 between 1994 - 2018") +
xlab("Years") + ylab("Larcency_theft") +
theme(
plot.title = element_text(color="blue", size=15, face="bold.italic"),
axis.text.x = element_text(angle=60, hjust=1),
axis.title.x = element_text(color="blue", size=15, face="bold"),
axis.title.y = element_text(color="blue", size=15, face="bold")
)
ggplotly(Larceny_theft_rate)Motor_vehicle_theft_rate <-
ggplot(crime_data, aes(Year, Motor_vehicle_theft_rate, group = 1)) +
geom_line(linetype = "dashed", color = "red") +
geom_point()+
ggtitle("Motor_vehicle_theft per 100,000 between 1994 - 2018") +
xlab("Years") + ylab("Violent_crime") +
theme(
plot.title = element_text(color="blue", size=15, face="bold.italic"),
axis.text.x = element_text(angle=60, hjust=1),
axis.title.x = element_text(color="blue", size=15, face="bold"),
axis.title.y = element_text(color="blue", size=15, face="bold")
)
ggplotly(Motor_vehicle_theft_rate)AverageCrimeRate <-
ggplot(crime_data, aes(Year, AverageCrimeRate, group = 1)) +
geom_line(linetype = "dashed", color = "red") +
geom_point()+
ggtitle("Average Crime Rate per 100,000 between 1994 - 2018") +
xlab("Years") + ylab("Violent_crime") +
theme(
plot.title = element_text(color="blue", size=15, face="bold.italic"),
axis.text.x = element_text(angle=60, hjust=1),
axis.title.x = element_text(color="blue", size=15, face="bold"),
axis.title.y = element_text(color="blue", size=15, face="bold")
)
ggplotly(AverageCrimeRate)PercentageCrimeRate <-
ggplot(crime_data, aes(Year, PercentageCrimeRate, group = 1)) +
geom_line(linetype = "dashed", color = "red") +
geom_point()+
ggtitle("% Crime Rate between 1994 - 2018") +
xlab("Year") + ylab("Overall % Crime Rate") +
theme(
plot.title = element_text(color="red", size=15, face="bold.italic"),
axis.text.x = element_text(angle=60, hjust=1),
axis.title.x = element_text(color="blue", size=15, face="bold"),
axis.title.y = element_text(color="orange", size=15, face="bold")
)
ggplotly(PercentageCrimeRate)The analysis above leads us to the following conclusions that the average rate of crime has decreased steadily over the period between 1994 and 2018.